import gzip
import urllib.request, urllib.error
import socket

def get_content2_utf8(url):
    return get_content2(url, 'utf-8')

# 获取网页数据, using urllib2
def get_content2(url, encoding):
    timeout = 20
    socket.setdefaulttimeout(timeout)
    headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36'}
    urlreq = urllib.request.URLopener()
    #urlreq.addheader('Host', 'www.tianyancha.com')
    urlreq.addheader('Accept', 'application/json, text/plain, */*')
    urlreq.addheader('Connection', 'keep-alive')
    urlreq.addheader('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36')
    #urlreq.addheader('Referer', 'http://www.tianyancha.com/search/%E4%BA%A4%E6%98%93%E4%B8%AD%E5%BF%83?base=ln&searchCity=%E8%90%A5%E5%8F%A3')
    #urlreq.addheader('Cookie', 'TYCID=f5c30b6883bb448ba0ef85a757ba1c3f; tnet=229.226.92.222; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1470721879; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1470724300; token=dbbca1d13a7f4b72b1116fa6ddaf6a45; _utm=8897a8dae0d14bb3aa5659199b84c6e5; _pk_id.1.e431=1738c2140deec5ab.1470721879.1.1470724300.1470721879.; _pk_ses.1.e431=*')
    try:
        fp = urlreq.open(url);
        html = fp.read()
    except urllib.error as e:
        print("Error when load ", url)
        print(e)
        return None
    finally:
        urlreq.close()

    try:
        details = gzip.decompress(html).decode('utf-8')
    except:
        details = html.decode(encoding)
    finally:
        fp.close()

    return details


if __name__== '__main__':
    result = get_content2_utf8("http://www.tianyancha.com/search/%E4%BA%A4%E6%98%93%E4%B8%AD%E5%BF%83.json?&pn=1&base=ln&city=%E8%90%A5%E5%8F%A3")
    print(result)

